智能计算系统2 bangc算子开发的demo (CPU和MLU270的异构编程流程) 您所在的位置:网站首页 寒武纪 cpu 智能计算系统2 bangc算子开发的demo (CPU和MLU270的异构编程流程)

智能计算系统2 bangc算子开发的demo (CPU和MLU270的异构编程流程)

2024-07-11 20:53| 来源: 网络整理| 查看: 265

文章目录 1. 首先加载环境2. 代码2.1 Makefile 项目管理编写2.2 MLU270需要执行的代码2.3 CPU上执行的代码调度MLU2.4 主函数2.5 实验结果 本文主要是介绍如何利用寒武纪 bangc 语言, 即 cnrt.h 库, 实现 CPU与 MLU异构编程的流程

本文独立代码gitee中 另一个实验是后面bangc实验一中的powerdifference

这是一个智能计算系统259页累加的例子实现

整个流程如下图所示

在这里插入图片描述

1. 首先加载环境

环境位置 /home/zjq/AICSE-demo-student/env/env.sh

修改环境内容改成相对路径

#!/bin/bash export AICSE_MODELS_MODEL_HOME=/opt/Cambricon-Test/models export AICSE_MODELS_DATA_HOME=/opt/Cambricon-Test/datasets/ #export AICSE_MODELS_MODEL_HOME=$PWD/../data/models #export AICSE_MODELS_DATA_HOME=$PWD/../data/data export NEUWARE=$PWD/neuware export NEUWARE_HOME=$PWD/neuware export TENSORFLOW_MODELS_DATA_HOME=$AICSE_MODELS_DATA_HOME export PATH=$PATH:$NEUWARE/bin export PATH=$PATH:/usr/local/neuware/bin unset LD_LIBRARY_PATH export LD_LIBRARY_PATH=$NEUWARE/lib64 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/neuware/lib64 source /etc/profile

运行环境 source env.sh

为了方便, 执行下面代码, 就可以通过bangcstart每次直接进入环境了

echo 'alias bangcstart="pushd /home/zjq/AICSE-demo-student/env/ && source env.sh && pushd"' >> ~/.bashrc source ~/.bashrc 2. 代码

下载代码 cd /home/zjq && git clone https://gitee.com/nwu_zjq/cambrian-demo.git

下面的代码是例子 powerDifference 对应于智能计算系统 里面的实验一, bangc 算子实验

只不过因为里面的项目管理有点混乱, 为了更有条理的理解整个CPU和MLU的异构编程流程, 我重新整理了项目, 利用Makefile进行管理, 标准的 src 放源码, include 放头文件

其实项目的原始代码是在 /home/zjq/AICSE-demo-student/demo/style_transfer_bcl/src/bangc/PluginPowerDifferenceOp/

2.1 Makefile 项目管理编写 # 需要运行 pushd /home/zjq/AICSE-demo-student/env/ && source env.sh && pushd CNCCParams = --bang-mlu-arch=MLU200 GPPParams = -g -std=c++11 -Iinclude -I${NEUWARE_HOME}/include # 这里设置的是C++11标准, include是头文件 -g是调试 LINKParams = -L ${NEUWARE_HOME}/lib64 -lcnrt object= obj/plugin_power_difference_kernel.o obj/plugin_power_difference_op.o obj/main.o obj/%.o: src/%.mlu mkdir -p $(@D) cncc ${CNCCParams} -o $@ -c $ quotient+=1; } // TODO:内存申请 __nram__ half inputx_nram[ONELINE]; __nram__ half inputy_nram[ONELINE]; __nram__ half temp_nram[ONELINE]; // TODO:For循环计算 for (int i = 0; i __bang_mul(temp_nram,temp_nram,temp_nram,ONELINE); } // TODO:结果拷出操作 __memcpy(output+i*ONELINE,temp_nram,ONELINE*sizeof(half),NRAM2GDRAM); } } 对应的头文件 // /home/zjq/cambrian-demo/powerDifference/include/plugin_power_difference_kernel.h #ifndef _PLUGIN_POWER_DIFFERENCE_KERNEL_H_ #define _PLUGIN_POWER_DIFFERENCE_KERNEL_H_ #ifdef __cplusplus extern "C" { #endif #include #include "cnrt.h" // 调用mlu的库函数 #include "cnrt_data.h" #include "stdio.h" typedef uint16_t half; // TODO:BCL接口定义 // void PowerDifferenceKernel(---); void PowerDifferenceKernel(half* input1,half* input2,int32_t pow, half* output,int32_t dims_a); #ifdef __cplusplus } #endif #endif // _PLUGIN_POWER_DIFFERENCE_KERNEL_H_ 2.3 CPU上执行的代码调度MLU

_op.cc 完成了CPU上内存数据的开辟以及跟MLU之间的数据传递, 并且封装成op 的 API, 供给 main函数调用

// /home/zjq/cambrian-demo/powerDifference/src/plugin_power_difference_op.cpp #include "cnrt.h" // 调用mlu的库函数 #include "cnrt_data.h" #include "plugin_power_difference_kernel.h" #include "plugin_power_difference_op.h" int MLUPowerDifferenceOp(float* input1,float* input2, int pow, float*output, int dims_a) { cnrtInit(0); // 初始化设备 cnrtDev_t dev; cnrtGetDeviceHandle(&dev, 0); cnrtSetCurrentDevice(dev); cnrtQueue_t pQueue; cnrtCreateQueue(&pQueue); // 设置任务划分, cnrtDim3_t dim; dim.x = 1; // 这里使单核, 如果是dim.x=4, 则是4核, 也就是一行能计算64*4=256位 dim.y = 1; dim.z = 1; float hardware_time = 0.0; cnrtNotifier_t event_start; cnrtNotifier_t event_end; cnrtCreateNotifier(&event_start); cnrtCreateNotifier(&event_end); cnrtFunctionType_t c = CNRT_FUNC_TYPE_BLOCK; //prepare data half* input1_half = (half*)malloc(dims_a * sizeof(half)); half* input2_half = (half*)malloc(dims_a * sizeof(half)); half* output_half = (half*)malloc(dims_a * sizeof(half)); cnrtConvertFloatToHalfArray(input1_half, input1, dims_a); cnrtConvertFloatToHalfArray(input2_half, input2, dims_a); cnrtConvertFloatToHalfArray(output_half, output,dims_a); half *mlu_input1, *mlu_input2, *mlu_output; if (CNRT_RET_SUCCESS != cnrtMalloc((void**)&mlu_input1, dims_a * sizeof(half))) { printf("cnrtMalloc Failed!\n"); exit(-1); } if (CNRT_RET_SUCCESS != cnrtMalloc((void**)&mlu_input2, dims_a * sizeof(half))) { printf("cnrtMalloc Failed!\n"); exit(-1); } if (CNRT_RET_SUCCESS != cnrtMalloc((void**)&mlu_output, dims_a * sizeof(half))) { printf("cnrtMalloc output Failed!\n"); exit(-1); } // TODO:完成cnrtMemcpy拷入函数 cnrtMemcpy(mlu_input1,input1_half,dims_a*sizeof(half),CNRT_MEM_TRANS_DIR_HOST2DEV); cnrtMemcpy(mlu_input2,input2_half,dims_a*sizeof(half),CNRT_MEM_TRANS_DIR_HOST2DEV); //kernel parameters cnrtKernelParamsBuffer_t params; cnrtGetKernelParamsBuffer(¶ms); cnrtKernelParamsBufferAddParam(params, &mlu_input1, sizeof(half*)); cnrtKernelParamsBufferAddParam(params, &mlu_input2, sizeof(half*)); cnrtKernelParamsBufferAddParam(params, &pow, sizeof(int)); cnrtKernelParamsBufferAddParam(params, &mlu_output, sizeof(half*)); cnrtKernelParamsBufferAddParam(params, &dims_a, sizeof(int)); cnrtPlaceNotifier(event_start, pQueue); // TODO:完成cnrtInvokeKernel函数 cnrtInvokeKernel_V2((void*)&PowerDifferenceKernel,dim,params,c,pQueue); if (CNRT_RET_SUCCESS != cnrtSyncQueue(pQueue)) { printf("syncQueue Failed!\n"); exit(-1); } cnrtPlaceNotifier(event_end, pQueue); //get output data // TODO:完成cnrtMemcpy拷出函数 cnrtMemcpy(output_half,mlu_output,dims_a*sizeof(half),CNRT_MEM_TRANS_DIR_DEV2HOST); cnrtConvertHalfToFloatArray(output, output_half,dims_a ); //free data if (CNRT_RET_SUCCESS != cnrtFree(mlu_input1)) { printf("cnrtFree Failed!\n"); exit(-1); } if (CNRT_RET_SUCCESS != cnrtFree(mlu_input2)) { printf("cnrtFree Failed!\n"); exit(-1); } if (CNRT_RET_SUCCESS != cnrtFree(mlu_output)) { printf("cnrtFree output Failed!\n"); exit(-1); } if (CNRT_RET_SUCCESS != cnrtDestroyQueue(pQueue)) { printf("cnrtDestroyQueue Failed!\n"); exit(-1); } if (CNRT_RET_SUCCESS != cnrtDestroyKernelParamsBuffer(params)) { printf("cnrtDestroyKernelParamsBuffer Failed!\n"); return -1; } cnrtDestroy(); free(input1_half); free(input2_half); free(output_half); return 0; } 对应的头文件 // /home/zjq/cambrian-demo/powerDifference/include/plugin_power_difference_op.h #ifndef _PLUGIN_POWER_DIFFERENCE_OP_H_ #define _PLUGIN_POWER_DIFFERENCE_OP_H_ #ifdef __cplusplus extern "C" { #endif #include #include "stdio.h" int MLUPowerDifferenceOp(float* input1,float* input2, int pow, float*output, int dims_a); #ifdef __cplusplus } #endif #endif 2.4 主函数 // /home/zjq/cambrian-demo/powerDifference/src/main.cpp #include #include #include "stdio.h" #include #include #include "plugin_power_difference_op.h" // 这里包含CPU和MLU交互逻辑 #define DATA_COUNT 32768 #define POW_COUNT 2 // int MLUPowerDifferenceOp(float* input1,float* input2, int pow, float*output, int dims_a); int main() { float* input_x = (float*)malloc(DATA_COUNT * sizeof(float)); float* input_y = (float*)malloc(DATA_COUNT * sizeof(float)); float* output_data = (float*)malloc(DATA_COUNT * sizeof(float)); float* output_data_cpu = (float*)malloc(DATA_COUNT * sizeof(float)); FILE* f_input_x = fopen("./data/in_x.txt", "r"); FILE* f_input_y = fopen("./data/in_y.txt", "r"); FILE* f_output_data = fopen("./data/out.txt", "r"); struct timeval tpend, tpstart; float err = 0.0; float cpu_sum = 0.0; float time_use = 0.0; if (f_input_x == NULL|| f_input_y == NULL || f_output_data == NULL) { printf("Open file fail!\n"); return 0; } gettimeofday(&tpstart, NULL); srand((unsigned)time(NULL)); for (int i = 0; i err +=fabs(output_data_cpu[i] - output_data[i]) ; cpu_sum +=fabs(output_data_cpu[i]); } printf("err rate = %0.4f%%\n", err*100.0/cpu_sum); return 0; }

在这里插入图片描述

2.5 实验结果 root@localhost:/home/zjq/cambrian-demo/powerDifference# make /* g++ obj/plugin_power_difference_kernel.o obj/powerDiff.o obj/main.o -o main -L /home/zjq/AICSE-demo-student/env/neuware/lib64 -lcnrt ./main get data cost time 27.130000 ms CNRT: 4.2.1 fa5e44c compute data cost time 31.934000 ms input x 139.000000 input y 70.000000 output data 4760.000000 output data 15872.000000 output data 14880.000000 err rate = 0.0117% */


【本文地址】

公司简介

联系我们

今日新闻

    推荐新闻

    专题文章
      CopyRight 2018-2019 实验室设备网 版权所有